library(tidyverse)
library(lubridate)
fix_emperors <- function(data) {
data %>%
mutate(
birth = case_when(
index %in% c(1, 2, 4, 6) ~ update(birth, year = -year(birth)),
TRUE ~ birth
),
reign_start = case_when(
index == 1 ~ update(reign_start, year = -year(reign_start)),
TRUE ~ reign_start
)
)
}
The first exercise uses a dataset about roman emperors from the tidytuesday project (link). You can import it with:
raw_emperors <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-08-13/emperors.csv")
emperors <- fix_emperors(raw_emperors)
Here are a couple of questions to answer. Decide for yourselves if a particular question is best answered using a visualization, a table or a simple sentence.
rising_to_power <- emperors %>%
count(rise, sort = TRUE)
rising_to_power %>%
mutate(rise = fct_reorder(rise, desc(n))) %>%
ggplot(aes(n, rise, fill = n)) +
geom_col() +
geom_text(aes(label = n), hjust = 1, color = "white") +
guides(fill = "none") +
theme_classic()
rising_to_power %>%
mutate(rise = fct_reorder(rise, desc(n))) %>%
ggplot(aes(rise, n, fill = n)) +
geom_col() +
geom_text(aes(label = n), hjust = 0.5, vjust = 0, color = "black",
fontface = "bold") +
guides(fill = "none") +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
rising_to_power %>%
mutate(
rise = fct_reorder(rise, desc(n))
) %>%
arrange(n) %>%
mutate(text_position = cumsum(n),
text_position = text_position - c(0, diff(text_position)) / 2) %>%
ggplot(aes(x = 1, y = n, fill = rise)) +
geom_col(width = 0.4) +
geom_text(aes(label = n, y = text_position)) +
coord_polar(theta = "y") +
theme_void() +
lims(x = c(0, 2))
emperors %>%
count(cause, killer, sort = TRUE) %>%
ggplot(aes(cause, n, fill = killer)) +
geom_col() +
fishualize::scale_fill_fish_d()
most_common_cause <- emperors %>%
count(cause, sort = TRUE) %>%
pull(cause) %>%
head(1)
The most common cause was Assassination.
emperors %>%
count(dynasty, sort = TRUE)
## # A tibble: 8 x 2
## dynasty n
## <chr> <int>
## 1 Gordian 22
## 2 Constantinian 15
## 3 Severan 8
## 4 Nerva-Antonine 7
## 5 Flavian 6
## 6 Julio-Claudian 5
## 7 Valentinian 4
## 8 Theodosian 1
emperors %>%
mutate(reign = reign_end - reign_start) %>%
group_by(dynasty) %>%
summarise(
top_emperor_reign = max(reign),
romes_top_emperor = paste(name[reign == top_emperor_reign], collapse = " and "),
reign = sum(reign)
)
## # A tibble: 8 x 4
## dynasty top_emperor_reign romes_top_emperor reign
## <chr> <drtn> <chr> <drtn>
## 1 Constantinian 11259 days Constantine the Great 54307 days
## 2 Flavian 5483 days Domitian 10326 days
## 3 Gordian 5449 days Gallienus 21377 days
## 4 Julio-Claudian 14825 days Augustus 34445 days
## 5 Nerva-Antonine 8276 days Antonius Pius 39263 days
## 6 Severan 7036 days Caracalla 21363 days
## 7 Theodosian 5860 days Theodosius I 5860 days
## 8 Valentinian 6024 days Valentinian II 21418 days
mean_lifetime <- emperors %>%
mutate(life = death - birth) %>%
group_by(dynasty) %>%
summarise(life = mean(life, na.rm = TRUE))
new_emperors <- emperors %>%
mutate(life = death - birth)
lifetimes_plot <-
new_emperors %>%
ggplot(aes(dynasty, life)) +
geom_jitter(aes(color = cause, label = name), width = 0.1) +
geom_point(data = mean_lifetime, color = "red")
plotly::ggplotly(lifetimes_plot)
emperors %>%
mutate(life = death - birth) %>%
ggplot(aes(dynasty, life)) +
geom_boxplot()
Another dataset (link) concerns dairy product consumption per person in the US across a number of years. Load it with
raw_dairy <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-01-29/milk_products_facts.csv")
dairy <- raw_dairy %>%
pivot_longer(-year, names_to = "product",
values_to = "kg_pp") %>%
mutate(kg_pp = kg_pp * 0.4535924)
dairy %>%
ggplot(aes(year, kg_pp, color = product)) +
geom_line() +
geom_point() +
facet_wrap(~ product, scales = "free") +
guides(color = "none")